In [23]:
%matplotlib inline
In [24]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
In [25]:
import csv
data = open('../data/data.csv', 'r').readlines()
fieldnames = ['x', 'y', 'z', 'unmasked', 'synapses']
reader = csv.reader(data)
reader.next()
rows = [[int(col) for col in row] for row in reader]
sorted_x = sorted(list(set([r[0] for r in rows])))
sorted_y = sorted(list(set([r[1] for r in rows])))
sorted_z = sorted(list(set([r[2] for r in rows])))
vol = np.zeros((len(sorted_x), len(sorted_y), len(sorted_z)))
for r in rows:
vol[sorted_x.index(r[0]), sorted_y.index(r[1]), sorted_z.index(r[2])] = r[-1]
In [26]:
y_sum = [0] * len(vol[0,:,0])
for i in range(len(vol[0,:,0])):
y_sum[i] = sum(sum(vol[:,i,:]))
In [27]:
ax = sns.barplot(x=range(len(y_sum)), y=y_sum, color="b")
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
Above, we see a histogram of y_sum
that indicates that there is a local minimum at the 12th layer of y-sampling, which colocates with where we anticipate seeing the boundary between layers I and II. Here is the biological substantiation:
As we can see, at about 1/3 of the 'depth' into cortex is the boundary to layer II.
In [28]:
from scipy.signal import argrelextrema
def local_minima(a):
return argrelextrema(a, np.less)
whole_volume_minima = local_minima(np.array(y_sum))
In [29]:
whole_volume_minima
Out[29]:
Now let's examine smaller chunks of the volume:
In [30]:
CHUNK_SIZE = 25
sections = [(i*CHUNK_SIZE, (i+1)*CHUNK_SIZE) for i in range(len(vol[:,0,0]) / CHUNK_SIZE)]
histogram = {}
for s in sections:
section = vol[s[0]:s[1]]
histogram[s] = [0] * len(vol[0,:,0])
for i in range(len(vol[0,:,0])):
histogram[s][i] = sum(sum(vol[s[0]:s[1],i,:]))
In [31]:
h_local_minima = []
for t, h in histogram.iteritems():
h_local_minima.extend([i for i in local_minima(np.array(h))])
In [32]:
total_histogram = [item for sublist in h_local_minima for item in sublist]
sns.distplot(total_histogram, bins=26)
Out[32]:
In [33]:
sns.distplot(total_histogram, bins=15)
Out[33]:
In [34]:
scatterable = []
i = 0
for h in h_local_minima:
[scatterable.append([i, m]) for m in h]
i += 1
plt.scatter(x=[s[0] for s in scatterable], y=[s[1] for s in scatterable])
Out[34]:
This coincides with our understanding that our sample space extends midway into layer 4, but covers all of layers 1, 2, and 3.
To play with these parameters, try changing
CHUNK_SIZE
to other values to change how small the sections of subcortex are.
In [35]:
from sklearn.cluster import KMeans
plt.scatter([0] * len(total_histogram), total_histogram)
Out[35]:
In [36]:
NUM_CLUSTERS = 3
k3cluster = KMeans(n_clusters=NUM_CLUSTERS)
total_histogram.sort()
clusters_for_th = k3cluster.fit_predict(np.array(total_histogram).reshape(-1, 1))
Now we can get the centroids from these clusters. I wish I understood what I was doing.
In [37]:
clusters = { n: [] for n in range(NUM_CLUSTERS) }
for i in range(len(total_histogram)):
clusters[clusters_for_th[i]].append(total_histogram[i])
In [38]:
clusters
Out[38]:
Now we can assume that the means of these clusters are the actual boundaries between cortex.
In [39]:
cluster_means = [np.mean(v) for _, v in clusters.iteritems() ]
In [40]:
cluster_means
Out[40]:
In [41]:
from PIL import Image
import urllib, cStringIO
file = cStringIO.StringIO(urllib.urlopen("http://openconnecto.me/ocp/ca/bock11/image/xy/7/350,850/50,936/2917/").read())
img = Image.open(file)
img_array = np.array(img)
In [46]:
cluster_means = np.array(cluster_means)
cluster_means_mapped = (cluster_means / vol.shape[1]) * img_array.shape[0]
for i in cluster_means_mapped:
img_array[[i, i+10, i-10], :] -= 50
In [47]:
Image.fromarray(img_array)
Out[47]:
To do this, we'll first flatten along the y-plane to find descending processes.
In [44]:
yflat = np.amax(vol, axis=1)
frame_y = pd.DataFrame(yflat)
sns.heatmap(frame_y)
Out[44]:
In [45]:
processkmeans = KMeans()
print yflat
processkmeans.fit_predict(yflat)
Out[45]: